home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
17 Bit Software 6: Level 6
/
17 Bit - Level 6 (1998)(Epic Marketing)[!].iso
/
!applications!
/
stc4102
/
sources
/
optidec.s
< prev
Wrap
Text File
|
1993-12-06
|
4KB
|
190 lines
;-----------------------------------------------------------------------------
;S404 highly optimized data_decruncher_lib 1.0_turbo for use in stc.library
;23.11.93 by Marcus 'Cozine' Ottosson
;
; This is a highly optimized library decruncher. The code does not fit into
; the instruction cache (286 bytes). Generally, it's NOT possible to use all
; the 256 bytes available in the instruction cache. The caches in the 68020
; and the 68030 consist of 16 rows, each of which contains 16 bytes.
; AllocMem() and AllocPooled(), the memory allocation routines used for
; loading executables, guarantee 8-byte alignment.
; On the 68000 my large testfile decrunches 3.7% faster in this version than
; the standard version. Some files may decrunch much slower on the 68020 and
; the 68030. I don't know the maximum loss, probably about 10-20%. The loss
; will not be very high when we are multitasking, since the interrupts cause a
; partial or complete cache flush anyway. I want some some test reports!!!
;
;-----------------------------------------------------------------------------
;S404 data_decruncher v0.2
;(c) 1993 by Jouni 'Mr.Spiv' Korhonen (SWSW)
;-----------------------------------------------------------------------------
;call with registers: a2 = destination address
; a1 = crunched data
;-----------------------------------------------------------------------------
;Uses d0-d7/a0-a6
decrunch:
l0: addq #8,a1
edDCopyableStart:
move.l a2,a5
add.l (a1)+,a2
add.l (a1),a1
edDCopyableStart2:
moveq #0,d4
moveq #16,d5
movem (a1),d2/d6/d7
not d4
lea loff6(pc),a3
lea llen5a(pc),a4
moveq #1,d0
moveq #-1,d3
bra.s ltest1
cnop 0,8 ; Use if main loop>=244 bytes
;*** Here's the start of the instruction cache
lins: subq #8,d7
bpl.s lins2
lins1: move d7,d1
addq #8,d7
lsl.l d7,d6
move -(a1),d6
neg d1
lsl.l d1,d6
addq #8,d7
swap d6
move.b d6,-(a2)
swap d6
cmp.l a2,a5
dbhs d7,lmain
bra.s lexma
lins2: rol #8,d6
move.b d6,-(a2)
ltest1: cmp.l a2,a5
dbhs d7,lmain
lexma bhs.s lexit
lmain1: move -(a1),d6
moveq #15,d7
lmain: add d6,d6
bcc.s lins
dbf d7,llen1
move -(a1),d6
moveq #15,d7
llen1: add d6,d6
bcs.s llen6
moveq #2,d1
moveq #4-2,d3
dbf d7,llen2
move -(a1),d6
moveq #15,d7
llen2: add d6,d6
bcs.s llen5
dbf d7,llen3
move -(a1),d6
moveq #15,d7
llen3: add d6,d6
bcc.s llen4
moveq #4,d1
moveq #8-2,d3
lea llen3a(pc),a6
bra.s lbits
llen3a: add d1,d3
cmp #15,d1
blo.s loff1
moveq #5,d1
moveq #14-1,d3
lea llen3b(pc),a6
bra.s lbits
llen4: moveq #23-2,d3
lloop: moveq #8,d1
llen5: move.l a4,a6
bra.s lbits
llen5a: add d1,d3
not.b d1
dbeq d7,loff2
bne.s loff2a
bra.s lloop
loff6: add d1,a0
move.b (a0),-(a2)
lcopy: move.b -(a0),-(a2)
dbf d3,lcopy
ltest: cmp.l a2,a5
dbhs d7,lmain
blo.s lmain1
lexit: rts
llen6: dbf d7,llen7
move -(a1),d6
moveq #15,d7
llen7: add d6,d6
addx d0,d3
loff1: dbf d7,loff2
loff2a: move -(a1),d6
moveq #15,d7
loff2: add d6,d6
bcs.s loff3
dbf d7,loff4
move -(a1),d6
moveq #15,d7
loff4: moveq #9,d1
lea 32(a2),a0
add d6,d6
bcc.s loff5
moveq #5,d1
move.l a2,a0
bra.s loff5
loff3: lea 544(a2),a0
move d2,d1
loff5: move.l a3,a6
lbits: and.l d4,d6
sub d1,d7
bpl.s lbits2
add d7,d1
lsl.l d1,d6
move d7,d1
move -(a1),d6
neg d1
add d5,d7
lbits2: lsl.l d1,d6
move.l d6,d1
swap d1
jmp (a6)
; This part is not executed very often. Some files may decrunch much slower
; on the 68020/68030.
llen3b: add d1,d3
l2ins: subq #8,d7
bmi.s l2ins1
rol #8,d6
move.b d6,-(a2)
dbf d3,l2ins
bra.s ltest
l2ins1: move d7,d1
addq #8,d7
lsl.l d7,d6
move -(a1),d6
neg d1
lsl.l d1,d6
addq #8,d7
swap d6
move.b d6,-(a2)
swap d6
dbf d3,l2ins
bra ltest